Arrests <- read_csv("Police_Arrests_Clean.csv")
## Rows: 37311 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (11): Charge, Street, City, State, Race, Gender, Ethnicity, Arrest_Type...
## dbl   (6): Incident_Id, Zip, Age, latitude, longitude, Object_Id
## dttm  (1): Arrest_Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Incidents <- read_csv("Police_Incidents_Clean.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 109004 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (11): Agency, Offense, Street, City, State, Reported.As, Premise, Forci...
## dbl   (6): Incident_Id, Zip, Victim_Age, Latitude, Longitude, Object_Id
## dttm  (3): Report_Date, Occur_Date, Found_Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Data Inspection

# Check structure of datasets
glimpse(Arrests)
## Rows: 37,311
## Columns: 18
## $ Incident_Id   <dbl> 24988, 26403, 24155, 34658, 25254, 26805, 28379, 27807, …
## $ Charge        <chr> "SEX OFFENSE-2ND DEG", "2ND DEGREE TRESPASS", "COMMON LA…
## $ Street        <chr> "200 GARDNER CIR", "224 KNOLLS ST", "128 JOHNSON STREET"…
## $ City          <chr> "CHAPEL HILL", "CHAPEL HILL", "CHAPEL HILL", "CHAPEL HIL…
## $ State         <chr> "NC", "NC", "NC", "NC", "NC", "NC", "NC", "NC", "NC", "N…
## $ Zip           <dbl> 27516, 27516, 27516, 27516, 27516, 27516, 27516, 27516, …
## $ Arrest_Date   <dttm> 2015-07-05 22:13:00, 2011-03-12 23:01:00, 2014-07-24 03…
## $ Age           <dbl> 38, 56, 27, 59, 19, 44, 26, 25, 29, 19, 49, 24, 18, 19, …
## $ Race          <chr> "W", "B", "B", "W", "W", "B", "B", "B", "B", "B", "B", "…
## $ Gender        <chr> "M", "M", "M", "F", "F", "F", "M", "M", "M", "F", "M", "…
## $ Ethnicity     <chr> "H", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", N…
## $ Arrest_Type   <chr> "ON VIEW", "SUMMONED/CITED", "TAKEN INTO CUSTODY (WARRAN…
## $ Drugs_Alcohol <chr> "Y", "U", "U", "Y", "Y", "N", "N", "Y", "Y", "N", "Y", "…
## $ Weapon        <chr> "UNARMED", "UNARMED", "UNARMED", "UNARMED", "UNARMED", "…
## $ Disposition   <chr> "CLEARED BY ARREST", "CLEARED BY ARREST", "CLEARED BY AR…
## $ latitude      <dbl> 35.89088, 35.90571, 35.90513, 35.92202, 35.93296, 35.947…
## $ longitude     <dbl> -79.07104, -79.06835, -79.06875, -79.07185, -79.06871, -…
## $ Object_Id     <dbl> 1, 3, 4, 5, 8, 9, 10, 12, 14, 15, 17, 18, 19, 20, 22, 23…
glimpse(Incidents)
## Rows: 109,004
## Columns: 20
## $ Incident_Id   <dbl> 74984, 74487, 74299, 75136, 74985, 74445, 75130, 74990, …
## $ Agency        <chr> "CHPD", "CHPD", "CHPD", "CHPD", "CHPD", "CHPD", "CHPD", …
## $ Offense       <chr> "TRESPASSING", "DOMESTIC DISTURBANCE/NO ASSAULT", "DOMES…
## $ Street        <chr> "1301 FORDHAM BLVD", "103 PINEGATE CIR", "377 S ESTES DR…
## $ City          <chr> "CHAPEL HILL", "CHAPEL HILL", "CHAPEL HILL", "CHAPEL HIL…
## $ State         <chr> "NC", "NC", "NC", "NC", "NC", "NC", "NC", "NC", "NC", "N…
## $ Zip           <dbl> 27517, 27514, 27517, 27517, 27514, 27516, 27516, 27514, …
## $ Report_Date   <dttm> 2010-02-19 00:54:00, 2010-01-20 00:00:00, 2010-01-10 11…
## $ Occur_Date    <dttm> 2010-02-19 00:53:00, 2010-01-19 23:59:00, 2010-01-10 11…
## $ Found_Date    <dttm> 2010-02-19 00:54:00, 2010-01-20 00:00:00, 2010-01-10 11…
## $ Reported.As   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ Premise       <chr> "HOTEL/MOTEL", "HOME OF VICTIM - OTHER DWELLING", "HOME …
## $ Forcible      <chr> "N", "N", "Y", "N", "Y", "N", "Y", "N", "N", "N", NA, "N…
## $ Weapon        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ Victim_Age    <dbl> NA, 52, 35, 33, 30, NA, 43, NA, 21, 66, 76, NA, 22, NA, …
## $ Victim_Race   <chr> NA, "B", "B", "W", "W", "W", "W", NA, "I", "W", "W", NA,…
## $ Victim_Gender <chr> NA, "F", "F", "F", "M", "M", "F", NA, "M", "F", "F", NA,…
## $ Latitude      <dbl> 35.93626, 35.94832, 35.92433, 35.94095, 35.91457, 35.905…
## $ Longitude     <dbl> -79.02344, -79.00841, -79.02229, -79.00953, -79.05288, -…
## $ Object_Id     <dbl> 1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19,…
names(Arrests)
##  [1] "Incident_Id"   "Charge"        "Street"        "City"         
##  [5] "State"         "Zip"           "Arrest_Date"   "Age"          
##  [9] "Race"          "Gender"        "Ethnicity"     "Arrest_Type"  
## [13] "Drugs_Alcohol" "Weapon"        "Disposition"   "latitude"     
## [17] "longitude"     "Object_Id"
names(Incidents)
##  [1] "Incident_Id"   "Agency"        "Offense"       "Street"       
##  [5] "City"          "State"         "Zip"           "Report_Date"  
##  [9] "Occur_Date"    "Found_Date"    "Reported.As"   "Premise"      
## [13] "Forcible"      "Weapon"        "Victim_Age"    "Victim_Race"  
## [17] "Victim_Gender" "Latitude"      "Longitude"     "Object_Id"
head(Arrests)
## # A tibble: 6 × 18
##   Incident_Id Charge    Street City  State   Zip Arrest_Date           Age Race 
##         <dbl> <chr>     <chr>  <chr> <chr> <dbl> <dttm>              <dbl> <chr>
## 1       24988 SEX OFFE… 200 G… CHAP… NC    27516 2015-07-05 22:13:00    38 W    
## 2       26403 2ND DEGR… 224 K… CHAP… NC    27516 2011-03-12 23:01:00    56 B    
## 3       24155 COMMON L… 128 J… CHAP… NC    27516 2014-07-24 03:41:00    27 B    
## 4       34658 POSS COC… 300 S… CHAP… NC    27516 2019-06-18 14:55:00    59 W    
## 5       25254 PROVISIO… 119 N… CHAP… NC    27516 2017-12-21 04:40:00    19 W    
## 6       26805 FAIL TO … 100 C… CHAP… NC    27516 2015-10-10 16:11:00    44 B    
## # ℹ 9 more variables: Gender <chr>, Ethnicity <chr>, Arrest_Type <chr>,
## #   Drugs_Alcohol <chr>, Weapon <chr>, Disposition <chr>, latitude <dbl>,
## #   longitude <dbl>, Object_Id <dbl>
head(Incidents)
## # A tibble: 6 × 20
##   Incident_Id Agency Offense        Street City  State   Zip Report_Date        
##         <dbl> <chr>  <chr>          <chr>  <chr> <chr> <dbl> <dttm>             
## 1       74984 CHPD   TRESPASSING    1301 … CHAP… NC    27517 2010-02-19 00:54:00
## 2       74487 CHPD   DOMESTIC DIST… 103 P… CHAP… NC    27514 2010-01-20 00:00:00
## 3       74299 CHPD   DOMESTIC SIMP… 377 S… CHAP… NC    27517 2010-01-10 11:30:00
## 4       75136 CHPD   B&E RESIDENCE… 230 S… CHAP… NC    27517 2010-02-26 14:16:00
## 5       74985 CHPD   SIMPLE ASSAULT 201 E… CHAP… NC    27514 2010-02-19 01:29:00
## 6       74445 CHPD   B&E RESIDENCE… 515 S… CHAP… NC    27516 2010-01-17 18:30:00
## # ℹ 12 more variables: Occur_Date <dttm>, Found_Date <dttm>, Reported.As <chr>,
## #   Premise <chr>, Forcible <chr>, Weapon <chr>, Victim_Age <dbl>,
## #   Victim_Race <chr>, Victim_Gender <chr>, Latitude <dbl>, Longitude <dbl>,
## #   Object_Id <dbl>

Convert dates

Arrests <- Arrests %>%
  mutate(
  ArrestDate = as.Date(Arrest_Date),
  year = year(ArrestDate),
  month_num = month(ArrestDate),
  month = month(ArrestDate, label = TRUE),
  season = case_when(
    month_num %in% c(12, 1, 2)  ~ "Winter",
    month_num %in% c(3, 4, 5)   ~ "Spring",
    month_num %in% c(6, 7, 8)   ~ "Summer",
    month_num %in% c(9, 10, 11) ~ "Fall"
  )
)

Incidents <- Incidents %>%
  mutate(
  IncidentDate = as.Date(Occur_Date),
  year = year(IncidentDate),
  month_num = month(IncidentDate),
  month = month(IncidentDate, label = TRUE),
  season = case_when(
    month_num %in% c(12, 1, 2)  ~ "Winter",
    month_num %in% c(3, 4, 5)   ~ "Spring",
    month_num %in% c(6, 7, 8)   ~ "Summer",
    month_num %in% c(9, 10, 11) ~ "Fall"
  )
)

head(Arrests)
## # A tibble: 6 × 23
##   Incident_Id Charge    Street City  State   Zip Arrest_Date           Age Race 
##         <dbl> <chr>     <chr>  <chr> <chr> <dbl> <dttm>              <dbl> <chr>
## 1       24988 SEX OFFE… 200 G… CHAP… NC    27516 2015-07-05 22:13:00    38 W    
## 2       26403 2ND DEGR… 224 K… CHAP… NC    27516 2011-03-12 23:01:00    56 B    
## 3       24155 COMMON L… 128 J… CHAP… NC    27516 2014-07-24 03:41:00    27 B    
## 4       34658 POSS COC… 300 S… CHAP… NC    27516 2019-06-18 14:55:00    59 W    
## 5       25254 PROVISIO… 119 N… CHAP… NC    27516 2017-12-21 04:40:00    19 W    
## 6       26805 FAIL TO … 100 C… CHAP… NC    27516 2015-10-10 16:11:00    44 B    
## # ℹ 14 more variables: Gender <chr>, Ethnicity <chr>, Arrest_Type <chr>,
## #   Drugs_Alcohol <chr>, Weapon <chr>, Disposition <chr>, latitude <dbl>,
## #   longitude <dbl>, Object_Id <dbl>, ArrestDate <date>, year <dbl>,
## #   month_num <dbl>, month <ord>, season <chr>
head(Incidents)
## # A tibble: 6 × 25
##   Incident_Id Agency Offense        Street City  State   Zip Report_Date        
##         <dbl> <chr>  <chr>          <chr>  <chr> <chr> <dbl> <dttm>             
## 1       74984 CHPD   TRESPASSING    1301 … CHAP… NC    27517 2010-02-19 00:54:00
## 2       74487 CHPD   DOMESTIC DIST… 103 P… CHAP… NC    27514 2010-01-20 00:00:00
## 3       74299 CHPD   DOMESTIC SIMP… 377 S… CHAP… NC    27517 2010-01-10 11:30:00
## 4       75136 CHPD   B&E RESIDENCE… 230 S… CHAP… NC    27517 2010-02-26 14:16:00
## 5       74985 CHPD   SIMPLE ASSAULT 201 E… CHAP… NC    27514 2010-02-19 01:29:00
## 6       74445 CHPD   B&E RESIDENCE… 515 S… CHAP… NC    27516 2010-01-17 18:30:00
## # ℹ 17 more variables: Occur_Date <dttm>, Found_Date <dttm>, Reported.As <chr>,
## #   Premise <chr>, Forcible <chr>, Weapon <chr>, Victim_Age <dbl>,
## #   Victim_Race <chr>, Victim_Gender <chr>, Latitude <dbl>, Longitude <dbl>,
## #   Object_Id <dbl>, IncidentDate <date>, year <dbl>, month_num <dbl>,
## #   month <ord>, season <chr>

Question 1a:Yearly Trend

Arrests %>%
  count(year) %>%
  ggplot(aes(x = year, y = n)) +
  geom_line() +
  geom_point() +
  labs(
    title = "Yearly Arrests",
    x = "Year", y = "Number of Arrests"
  ) +
  theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

observations:

- Arrests start high in ~2010, with counts over 3,500 per year.
- Arrest numbers remain relatively stable between ~2010 and 2016, fluctuating slightly between ~3,200–3,600.
- Beginning around 2017–2018, there is a noticeable decline in arrests.
- The drop becomes much sharper around 2019–2021, falling to nearly half of earlier levels — this likely corresponds with the COVID-19 pandemic and lockdowns, when many cities saw decreased police activity and fewer arrests, and also since we are missing arrest data from April to Sepetermber of 2021.
- After 2020, there’s a small rebound, but the counts remain significantly lower than pre-2017 levels.

Question 1b: Monthly Trend

Arrests %>%
  count(year, month_num) %>%
  mutate(YearMonth = as.Date(paste(year, month_num, "01", sep = "-"))) %>%
  ggplot(aes(x = YearMonth, y = n)) +
  geom_line() +
  labs(title = "Monthly Arrests Over Time", x = "Year-Month", y = "Number of Arrests") +
  scale_x_date(date_breaks = "6 months", date_labels = "%Y-%m") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

Observations:

2009–2016:
Arrests fluctuate between ~250–400 per month, with periodic peaks (approaching ~500) and valleys.
There appears to be some seasonality: higher counts in some mid-year (likely summer) months, lower in others (likely winter).

2017–2018:
Monthly counts start to decline gradually compared to earlier years. The peaks are less pronounced, staying mostly below 350.

2019–2020:
Sharp decline begins late 2019 and early 2020 — arrests drop rapidly to below 200 per month.
This drop aligns with the COVID-19 pandemic and lockdowns, when many jurisdictions reduced arrests.

2021–2025:
Counts remain low (~100–150 per month) compared to pre-2019 levels.
There are occasional small spikes, but they stay much lower than earlier years, suggesting a lasting reduction.
- Stable high activity (2009–2016): Monthly arrests consistently in the 300–400+ range.
- Gradual decline (2017–2018): Decrease starts before COVID.
- Sharp drop (2019–2020): Likely due to pandemic-related restrictions and changes in enforcement.
- Sustained lower levels (2021 onward): Arrests remain at about half or less of their previous levels — indicating potential long-term shifts.

HEAT MAP

Arrests %>%
  count(year, month) %>%
  ggplot(aes(x = month, y = factor(year), fill = n)) +
  geom_tile() +
  scale_fill_gradient(low = "skyblue", high = "darkblue") +
  labs(
    title = "Heatmap of Arrests by Month and Year",
    x = "Month", y = "Year", fill = "Arrests"
  )+
  theme_minimal()

Observations: 
2010–2016:
Arrest levels are high overall (green to yellow hues) with a clear concentration in summer months (June–August) — particularly July & August, which show the most intense (yellow) spots.
Winter months (January–February, December) consistently show lower levels (blueish shades), which is expected due to seasonality.

2017–2018:
Arrests begin to decline slightly — the heatmap colors shift towards cooler greens and blues even in summer months, suggesting fewer arrests compared to earlier years.

2019–2020:
A dramatic drop appears starting in 2020 — most months, including summer, turn dark blue indicating very low arrest counts.
The drop is most pronounced in spring and summer 2020 — coinciding with COVID-19 restrictions.

2021–2024:
Arrest levels remain low (blue to dark blue), and the distinct summer peaks almost disappear.
The seasonality seems flattened — suggesting the usual summer increase has weakened post-pandemic.

Seasonal pattern:
Consistent before 2017:
- Peaks: June–August
- Lows: January–February & December
Post-2020, this pattern weakens.

Boxplot: Distribution of arrests by month

# Prepare data
monthly_counts <- Arrests %>%
  count(year, month) %>%
  mutate(
    month_num = as.numeric(month),
    # assign season
    season = case_when(
      month_num %in% c(12, 1, 2)  ~ "Winter",
      month_num %in% c(3, 4, 5)   ~ "Spring",
      month_num %in% c(6, 7, 8)   ~ "Summer",
      month_num %in% c(9, 10, 11) ~ "Fall"
    )
  ) %>%
  filter(!is.na(season))   %>%
 mutate(season = droplevels(factor(season)))

# Define seasonal colors
season_colors <- c(
  "Winter" = "skyblue3",
  "Spring" = "springgreen3",
  "Summer" = "indianred",
  "Fall"   = "gold"
)

# Plot
p <- ggplot(monthly_counts, aes(x = month, y = n, fill = season, color = season, group = month)) +
  geom_boxplot() +
  scale_y_continuous(breaks = seq(0, 500, 50)) +
  scale_fill_manual(values = season_colors) +
  scale_color_manual(values = season_colors) +
  labs(
    title = 'Distribution of Monthly Arrests by Month',
    subtitle = 'Year: {current_frame}',
    x = 'Month', y = 'Number of Arrests', fill = "Season", color = "Season"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.subtitle = element_text(size = 18, face = "bold", hjust = 0.5)
  )

# Animate with transition_manual
anim <- p +
  transition_manual(year)

# save animation: 
# animate(anim, width = 900, height = 600, fps = 1, duration = length(unique(monthly_counts$year)) * 4, renderer = gifski_renderer())
# anim_save("animated_boxplot_monthly_arrests_synced.gif")

General Trends:
The boxplots show clear seasonality:
 Winter (Jan–Feb, Dec) — lowest arrests, consistently around ~200–250.
 Spring (Mar–May) — arrests start increasing, reaching ~300–350.
 Summer (Jun–Aug) — peaks, especially July & August, with arrests reaching ~400–500.
 Fall (Sep–Nov) — begins to decline from summer peaks, stabilizing around ~300.

The seasonal pattern is most visible and consistent in the earlier years (2010–2016).
#checking why no spring and summer in 2020
# List of all years & months in your data

all_months <- month.abb

# All year-month combinations
all_combinations <- expand_grid(
  year = unique(Arrests$year),
  month = all_months
)

# Actual counts
actual_counts <- Arrests %>%
  count(year, month)

# Convert both month columns to character explicitly
all_combinations <- all_combinations %>%
  mutate(month = as.character(month))

actual_counts <- actual_counts %>%
  mutate(month = as.character(month))

# Left join + fill missing with 0
complete_counts <- all_combinations %>%
  left_join(actual_counts, by = c("year", "month")) %>%
  mutate(n = replace_na(n, 0)) %>%
  arrange(year, match(month, all_months))

# Show missing months
missing_months <- complete_counts %>% filter(n == 0)

print(missing_months)
## # A tibble: 20 × 3
##     year month     n
##    <dbl> <chr> <int>
##  1  2021 Apr       0
##  2  2021 May       0
##  3  2021 Jun       0
##  4  2021 Jul       0
##  5  2021 Aug       0
##  6  2021 Sep       0
##  7  2024 May       0
##  8  2024 Jun       0
##  9    NA Jan       0
## 10    NA Feb       0
## 11    NA Mar       0
## 12    NA Apr       0
## 13    NA May       0
## 14    NA Jun       0
## 15    NA Jul       0
## 16    NA Aug       0
## 17    NA Sep       0
## 18    NA Oct       0
## 19    NA Nov       0
## 20    NA Dec       0

Seasonal barplot

Arrests %>%
  count(year, season) %>%
  filter(!is.na(season)) %>% 
  ggplot(aes(x = factor(year), y = n, fill = season)) +
  geom_bar(stat = "identity", position = "dodge") + 
  scale_fill_manual(values = season_colors) +
  theme_minimal()+
  labs(
    title = "Arrests by Season and Year",
    x = "Year", y = "Number of Arrests", fill = "Season"
  )

General Observations:
1. In all years, Summer shows the highest number of arrests, peaking around ~1,200 in 2010–2016.
2. Fall and Spring are similar to each other, generally slightly below Summer but still high.
3. Winter consistently has the lowest number of arrests each year — roughly ~50–70% of Summer.

Trend Over Time:
Arrest numbers declined steadily from ~2015 to 2019 across all seasons.
- Summer arrests dropped from ~1,100 in 2014 to ~600–700 by 2019.
- Fall and Spring show a similar downward trend.
- Winter remained the lowest but also declined.

Impact of COVID-19:
In 2020–2021, a dramatic drop is visible in all seasons:
Winter remains low.
Summer and Fall drop sharply, to about ~300 or lower — nearly ¼ of the pre-pandemic peak.
Spring also falls dramatically.
This aligns with what we observed in earlier plots: the pandemic significantly reduced arrests.

Post-2021 (pandemic ends in May 2023):
In 2022–2024, some recovery is observed:
- Slight uptick in Spring and Fall arrests.
- Summer remains much lower than its pre-2020 highs.
- Overall, the seasonal differences are less pronounced in recent years than before 2020.

Seasonal Patterns throughout the years:
Summer > Fall ≈ Spring > Winter

Calendar Heatmap

# Prepare data
daily_arrests <- Arrests %>%
  filter(!is.na(ArrestDate)) %>%
  count(ArrestDate) %>%
  mutate(year = lubridate::year(ArrestDate))

# Get years
years <- sort(unique(daily_arrests$year))

# Loop: save PNG for each year
for (yr in years) {
  cat("Rendering year:", yr, "\n")
  
  p <- ggplot_calendar_heatmap(
      daily_arrests %>% filter(year == yr),
      'ArrestDate',
      'n'
    ) +
    scale_fill_gradient(low = "white", high = "red", name = "Arrests", na.value = "grey") +
    labs(
      title = sprintf("Calendar Heatmap of Daily Arrests — Year: %s", yr),
      subtitle = "Darker colors indicate more arrests, grey = missing data"
    ) +
    theme_minimal(base_size = 14)
  
 # ggsave(sprintf("calendar_%s.png", yr), plot = p, width = 12, height = 8)
}
## Rendering year: 2010 
## Rendering year: 2011 
## Rendering year: 2012 
## Rendering year: 2013 
## Rendering year: 2014 
## Rendering year: 2015 
## Rendering year: 2016 
## Rendering year: 2017 
## Rendering year: 2018 
## Rendering year: 2019 
## Rendering year: 2020 
## Rendering year: 2021 
## Rendering year: 2022 
## Rendering year: 2023 
## Rendering year: 2024
imgs <- list.files(
  path = "heat map calendar for each year",
  pattern = "\\.png$",
  full.names = TRUE
) |> sort()

length(imgs)
## [1] 15
output_gif <- "calendar_heatmap_animation.gif"

gifski(
  png_files = imgs,
  gif_file = output_gif,
  delay = 2,   # adjust speed (seconds per frame)
  width = 1200,
  height = 800,
  loop = TRUE
)
## [1] "/Users/xuziqing/Library/CloudStorage/OneDrive-UniversityofNorthCarolinaatChapelHill/STOR 320/FINAL project/calendar_heatmap_animation.gif"

Question 2: frequency and type of crime change across months and ZIP codes

Incidents <- Incidents %>%
  mutate(
    Date = as.Date(Occur_Date),           # adjust as needed
    Month = month(Date, label = TRUE),
    Year = year(Date)
  ) %>%
  filter(!is.na(Latitude) & !is.na(Longitude)) # keep valid rows
#new cleaned dataset pulled from github
ArrestsE <- read_csv("Police_Arrests_Clean2.csv")
## Rows: 37309 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (13): Charge, Street, City, State, Race, Gender, Ethnicity, Arrest_Type...
## dbl   (8): Incident_Id, Zip, Age, latitude, longitude, Object_Id, year, mont...
## dttm  (1): Arrest_Date
## date  (1): ArrestDate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ch= as.data.frame(filter(places(state= 'NC'), NAME=='Chapel Hill')$geometry[[1]][[1]][[1]])
## Retrieving data for the year 2024
##   |                                                                              |                                                                      |   0%  |                                                                              |                                                                      |   1%  |                                                                              |=                                                                     |   1%  |                                                                              |=                                                                     |   2%  |                                                                              |==                                                                    |   2%  |                                                                              |==                                                                    |   3%  |                                                                              |==                                                                    |   4%  |                                                                              |===                                                                   |   4%  |                                                                              |===                                                                   |   5%  |                                                                              |====                                                                  |   5%  |                                                                              |====                                                                  |   6%  |                                                                              |=====                                                                 |   6%  |                                                                              |=====                                                                 |   7%  |                                                                              |=====                                                                 |   8%  |                                                                              |======                                                                |   8%  |                                                                              |======                                                                |   9%  |                                                                              |=======                                                               |   9%  |                                                                              |=======                                                               |  10%  |                                                                              |=======                                                               |  11%  |                                                                              |========                                                              |  11%  |                                                                              |========                                                              |  12%  |                                                                              |=========                                                             |  12%  |                                                                              |=========                                                             |  13%  |                                                                              |==========                                                            |  14%  |                                                                              |==========                                                            |  15%  |                                                                              |===========                                                           |  15%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  17%  |                                                                              |============                                                          |  18%  |                                                                              |=============                                                         |  18%  |                                                                              |=============                                                         |  19%  |                                                                              |==============                                                        |  19%  |                                                                              |==============                                                        |  20%  |                                                                              |==============                                                        |  21%  |                                                                              |===============                                                       |  21%  |                                                                              |===============                                                       |  22%  |                                                                              |================                                                      |  22%  |                                                                              |================                                                      |  23%  |                                                                              |=================                                                     |  24%  |                                                                              |=================                                                     |  25%  |                                                                              |==================                                                    |  25%  |                                                                              |==================                                                    |  26%  |                                                                              |===================                                                   |  27%  |                                                                              |===================                                                   |  28%  |                                                                              |====================                                                  |  28%  |                                                                              |====================                                                  |  29%  |                                                                              |=====================                                                 |  29%  |                                                                              |=====================                                                 |  30%  |                                                                              |=====================                                                 |  31%  |                                                                              |======================                                                |  31%  |                                                                              |======================                                                |  32%  |                                                                              |=======================                                               |  32%  |                                                                              |=======================                                               |  33%  |                                                                              |=======================                                               |  34%  |                                                                              |========================                                              |  34%  |                                                                              |========================                                              |  35%  |                                                                              |=========================                                             |  35%  |                                                                              |=========================                                             |  36%  |                                                                              |==========================                                            |  37%  |                                                                              |==========================                                            |  38%  |                                                                              |===========================                                           |  38%  |                                                                              |===========================                                           |  39%  |                                                                              |============================                                          |  39%  |                                                                              |============================                                          |  40%  |                                                                              |============================                                          |  41%  |                                                                              |=============================                                         |  41%  |                                                                              |=============================                                         |  42%  |                                                                              |==============================                                        |  42%  |                                                                              |==============================                                        |  43%  |                                                                              |===============================                                       |  44%  |                                                                              |===============================                                       |  45%  |                                                                              |================================                                      |  45%  |                                                                              |================================                                      |  46%  |                                                                              |=================================                                     |  47%  |                                                                              |=================================                                     |  48%  |                                                                              |==================================                                    |  48%  |                                                                              |==================================                                    |  49%  |                                                                              |===================================                                   |  49%  |                                                                              |===================================                                   |  50%  |                                                                              |===================================                                   |  51%  |                                                                              |====================================                                  |  51%  |                                                                              |====================================                                  |  52%  |                                                                              |=====================================                                 |  52%  |                                                                              |=====================================                                 |  53%  |                                                                              |======================================                                |  54%  |                                                                              |======================================                                |  55%  |                                                                              |=======================================                               |  55%  |                                                                              |=======================================                               |  56%  |                                                                              |========================================                              |  57%  |                                                                              |========================================                              |  58%  |                                                                              |=========================================                             |  58%  |                                                                              |=========================================                             |  59%  |                                                                              |==========================================                            |  59%  |                                                                              |==========================================                            |  60%  |                                                                              |==========================================                            |  61%  |                                                                              |===========================================                           |  61%  |                                                                              |===========================================                           |  62%  |                                                                              |============================================                          |  62%  |                                                                              |============================================                          |  63%  |                                                                              |=============================================                         |  64%  |                                                                              |=============================================                         |  65%  |                                                                              |==============================================                        |  65%  |                                                                              |==============================================                        |  66%  |                                                                              |===============================================                       |  67%  |                                                                              |===============================================                       |  68%  |                                                                              |================================================                      |  68%  |                                                                              |================================================                      |  69%  |                                                                              |=================================================                     |  69%  |                                                                              |=================================================                     |  70%  |                                                                              |=================================================                     |  71%  |                                                                              |==================================================                    |  71%  |                                                                              |==================================================                    |  72%  |                                                                              |===================================================                   |  72%  |                                                                              |===================================================                   |  73%  |                                                                              |====================================================                  |  74%  |                                                                              |====================================================                  |  75%  |                                                                              |=====================================================                 |  75%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  77%  |                                                                              |======================================================                |  78%  |                                                                              |=======================================================               |  78%  |                                                                              |=======================================================               |  79%  |                                                                              |========================================================              |  79%  |                                                                              |========================================================              |  80%  |                                                                              |========================================================              |  81%  |                                                                              |=========================================================             |  81%  |                                                                              |=========================================================             |  82%  |                                                                              |==========================================================            |  82%  |                                                                              |==========================================================            |  83%  |                                                                              |===========================================================           |  84%  |                                                                              |===========================================================           |  85%  |                                                                              |============================================================          |  85%  |                                                                              |============================================================          |  86%  |                                                                              |=============================================================         |  87%  |                                                                              |=============================================================         |  88%  |                                                                              |==============================================================        |  88%  |                                                                              |==============================================================        |  89%  |                                                                              |===============================================================       |  89%  |                                                                              |===============================================================       |  90%  |                                                                              |===============================================================       |  91%  |                                                                              |================================================================      |  91%  |                                                                              |================================================================      |  92%  |                                                                              |=================================================================     |  92%  |                                                                              |=================================================================     |  93%  |                                                                              |==================================================================    |  94%  |                                                                              |==================================================================    |  95%  |                                                                              |===================================================================   |  95%  |                                                                              |===================================================================   |  96%  |                                                                              |====================================================================  |  97%  |                                                                              |====================================================================  |  98%  |                                                                              |===================================================================== |  98%  |                                                                              |===================================================================== |  99%  |                                                                              |======================================================================|  99%  |                                                                              |======================================================================| 100%
names(ArrestsE)
##  [1] "Incident_Id"   "Charge"        "Street"        "City"         
##  [5] "State"         "Zip"           "Arrest_Date"   "Age"          
##  [9] "Race"          "Gender"        "Ethnicity"     "Arrest_Type"  
## [13] "Drugs_Alcohol" "Weapon"        "Disposition"   "latitude"     
## [17] "longitude"     "Object_Id"     "ArrestDate"    "year"         
## [21] "month_num"     "month"         "season"

Loops of ARREST_COUNT 2010-2024 each month (na.omit)

library(dplyr)
library(ggplot2)

# create output folder if it doesn't exist
if (!dir.exists("maps_2010")) dir.create("maps_2010")

# loop through months 1 to 12
for (m in 1:12) {
  month_name <- month.abb[m]  # "Jan", "Feb", etc.
  
  freq_grid <- Arrests %>%
    filter(year == 2010, month == month_name) %>% 
    mutate(
      rlat = round(latitude * 200) / 200,
      rlon = round(longitude * 200) / 200
    ) %>%
    group_by(rlat, rlon) %>%
    summarize(n = n(), .groups = "drop")
  
  if (nrow(freq_grid) == 0) next
  
  p <- ggplot() +
    geom_tile(data = freq_grid, aes(x = rlon, y = rlat, fill = n)) +
    geom_path(data = ch, aes(x = V1, y = V2), color = "black") +
    coord_quickmap(xlim = c(-79.09, -78.99), ylim = c(35.86, 35.99)) +
    labs(
      title = "Arrest Frequency in Chapel Hill",
      subtitle = sprintf("2010 - Month: %s", month_name),
      fill = "Arrest Count",
      x = "Longitude",
      y = "Latitude"
    ) +
    scale_fill_viridis_c() +
    theme_minimal()
  
#  ggsave(filename = sprintf("maps_2010/arrests_2010_%s.png", month_name),plot = p, width = 8, height = 6)
  
}


# get all PNGs recursively
pngs <- list.files(
  path = "yearly_maps", 
  pattern = "\\.png$", 
  recursive = TRUE, 
  full.names = TRUE
)

# sort files by year and month
pngs_sorted <- pngs[order(pngs)]  # assumes your files are named arrests_YYYY_MMM.png

# order chronologically
month_order <- setNames(1:12, month.abb)

pngs_sorted <- pngs %>%
  tibble(path = .) %>%
  mutate(
    fname = basename(path),
    year = as.numeric(stringr::str_extract(fname, "\\d{4}")),
    month_str = stringr::str_extract(fname, paste(month.abb, collapse = "|")),
    month_num = month_order[month_str]
  ) %>%
  arrange(year, month_num) %>%
  pull(path)

gifski(
  png_files = pngs_sorted,
  gif_file = "all_years_arrests.gif",
  width = 1200,
  height = 800,
  delay = 2,   # seconds per frame
  loop = TRUE
)
## [1] "/Users/xuziqing/Library/CloudStorage/OneDrive-UniversityofNorthCarolinaatChapelHill/STOR 320/FINAL project/all_years_arrests.gif"